package Apache2::SolrSRUServer;

# Apache2::SolrSRUServer.pm - search a Solr/Lucene index via WebService::Solr

# Eric Lease Morgan <eric_morgan@infomotions.com>
# January 22, 2009 - first public distribution


# define
use constant SOLR => 'http://localhost:210/solr';
use constant MAX  => 100;

# require
use Apache2::Const -compile => qw( OK );
use CGI qw( -oldstyle_urls );
use CGI::Carp qw( fatalsToBrowser );
use CQL::Parser;
use SRU::Request;
use SRU::Response;
use strict;
use WebService::Solr;

sub handler {

	# initlize the necessary objects
	my $r        = shift;
	my $cgi      = CGI->new();
	my $request  = SRU::Request->newFromCGI( $cgi );
	my $response = SRU::Response->newFromRequest( $request );
	
	# check for type of response; explain
	if ($response->type eq 'explain') {
		
		# fill up the response's record 
		$response->record( SRU::Response::Record->new( recordSchema => 'http://explain.z3950.org/dtd/2.0/', 
		                                               recordData   => &record ));
		$response->stylesheet( 'explain.xsl' );

	}
	
	# scan
	elsif ($response->type eq 'scan') {
	
		$response->addDiagnostic( SRU::Response::Diagnostic->newFromCode( 4, 'Scan operation is not supported at this stage.' ));
		$response->asXML();
		
	}
	
	# search
	else {
		
		# parse the query and check it
		my $parser = CQL::Parser->new;
		eval { $parser->parse( $request->query ) };
		if ( $@ ) {
		
			$response->addDiagnostic( SRU::Response::Diagnostic->newFromCode( 10, $@ ));
			$r->content_type( 'text/xml' );
			$r->print( $response->asXML );
			return Apache2::Const::OK;
	
		 }
		 
		# i wish i understood this better
		my $node = $parser->parse( $request->query );
		
		# search
		my ( $total_hits, @results ) = &search( $node->toLucene, $request );
		
		# process each result
		for ( my $i = 0; $i <= $#results; $i++ ) {
	
			# check for maximum records
			if (defined($request->maximumRecords)) { last if ($i >= $request->maximumRecords) }
			
			# create a records object and add it to the responee
			my $record = SRU::Response::Record->new( recordSchema => 'info:srw/schema/1/dc-v1.1', recordData => $results[ $i ], recordPosition => $i + $request->startRecord );
			$response->addRecord( $record );
		
		}
		
		# include the total number of hits
		$response->numberOfRecords( $total_hits );
		
	}

	# done; output the result
	$r->content_type( 'text/xml' );
	$r->print( $response->asXML );
	return Apache2::Const::OK;

}

sub record {

	my $email = 'eric_morgan@infomotions.com';
	
	return <<EOF
<explain xmlns="http://explain.z3950.org/dtd/2.0/"><serverInfo protocol='SRU' version='1.1'><host>infomotions.com</host><port>80</port><database>sandbox/solr-sru/server.cgi</database></serverInfo><databaseInfo><title lang='en' primary='true'>Simple SRU server implemented with WebService::Solr</title><description lang='en' primary='true'>This is an index of American and English literature as well as Western philosophy.</description><author lang='en' primary='true'>Eric Lease Morgan, Infomotions, Inc.</author><contact lang='en' primary='true'>Eric Lease Morgan ($email)</contact><extent lang='en' primary='true'>The index contains content from the Alex Catalogue of Electronic Text, Project Gutenberg, the HaithiTrust, the Directory of Open Access Journals, Infomotions' Image Gallery, and probably some other stuff.</extent><langUsage lang='en' primary='true' codes='en'>All the records in this database are in English.</langUsage><implementation lang='en' primary='true'>This SRU server is written in Perl through the use of three API's. One for SRU (http://http://search.cpan.org/dist/SRU/). One for CQL (http://search.cpan.org/dist/CQL-Parser/), and one for the underlying indexer/search engine, WebServices::Solor (http://search.cpan.org/dist/WebService-Solr/).</implementation><links><link type='www'>http://infomotions.com/sandbox/solr-sru/index.html</link><link type='sru'>http://infomotions.com/sandbox/solr-sru/server.cgi</link></links></databaseInfo><metaInfo><dateModified>2009-01-17 18:12:04</dateModified></metaInfo><indexInfo><set identifier='info:srw/cql-context-set/1/dc-v1.1' name='dc' /><index><title>subject</title><map><name set='dc'>subject</name></map></index><index><title>related</title><map><name set='dc'>related</name></map></index><index><title>creator</title><map><name set='dc'>creator</name></map></index><index><title>publisher</title><map><name set='dc'>publisher</name></map></index><index><title>title</title><map><name set='dc'>title</name></map></index></indexInfo><schemaInfo><schema identifier='info:srw/schema/1/dc-v1.1' sort='false' name='dc'><title>Dublin Core</title></schema></schemaInfo><configInfo><default type='numberOfRecords'>100</default></configInfo></explain>
EOF

}

sub search {

	# initialize
	my $query   = shift;
	my $request = shift;
	my @results;
	
	# set up Solr
	my $solr = WebService::Solr->new( SOLR );
		
	# calculate start record and number of records
	my $start_record = 0;
	if ( $request->startRecord ) { $start_record = $request->startRecord - 1 }
	my $maximum_records = MAX; $maximum_records = $request->maximumRecords unless ( ! $request->maximumRecords );

	# search
	my $response   = $solr->search( $query, { 'start' => $start_record, 'rows' => $maximum_records });
	my @hits       = $response->docs;
	my $total_hits = $response->pager->total_entries;
	
	# display the number of hits
	if ( $total_hits ) {
	
		foreach my $doc ( @hits ) {
								
			# slurp
			my $id          = $doc->value_for(  'id' );
			my $name        = &escape_entities( $doc->value_for(  'title' ));
			my $publisher   = &escape_entities( $doc->value_for(  'publisher' ));
			my $description = &escape_entities( $doc->value_for(  'description' ));
			my @creator     = $doc->values_for( 'creator' );
			my $contributor = &escape_entities( $doc->value_for(  'contributor' ));
			my $url         = &escape_entities( $doc->value_for(  'url' ));
			my @subjects    = $doc->values_for( 'subject' );
			my $source      = &escape_entities( $doc->value_for(  'source' ));
			my $format      = &escape_entities( $doc->value_for(  'format' ));
			my $type        = &escape_entities( $doc->value_for(  'type' ));
			my $relation    = &escape_entities( $doc->value_for(  'relation' ));
			my $repository  = &escape_entities( $doc->value_for(  'repository' ));

			# full results, but included entities; hmmm...
			my $record  = '<srw_dc:dc xmlns="http://www.w3.org/TR/xhtml1/strict" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:srw_dc="info:srw/schema/1/dc-v1.1">';
			$record .= '<dc:title>' .  $name . '</dc:title>';
			$record .= '<dc:publisher>' .  $publisher . '</dc:publisher>';
			$record .= '<dc:identifier>' .  $url . '</dc:identifier>';
			$record .= '<dc:description>' .  $description . '</dc:description>';
			$record .= '<dc:source>' . $source . '</dc:source>';
			$record .= '<dc:format>' .  $format . '</dc:format>';
			$record .= '<dc:type>' .  $type . '</dc:type>';
			$record .= '<dc:contributor>' .   $contributor . '</dc:contributor>';
			$record .= '<dc:relation>' .   $relation . '</dc:relation>';
			foreach ( @creator ) { $record .= '<dc:creator>' .  $_ . '</dc:creator>' }
			foreach ( @subjects ) { $record .= '<dc:subject>' . $_ . '</dc:subject>' }
			$record .= '</srw_dc:dc>';
			push @results, $record;
						
		}
		
	}
	
	# done; return it
	return ( $total_hits, @results );
	
}


sub escape_entities {

	# get the input
	my $s = shift;
	
	# escape
	$s =~ s/&/&amp;/g;
	$s =~ s/</&lt;/g;
	$s =~ s/>/&gt;/g;
	$s =~ s/"/&quot;/g;
	$s =~ s/'/&apos;/g;

	# done
	return $s;
	
}

1;